{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyOG23tHLlhtJZiIsQMYUphC",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Huangjian2013/ai-demo/blob/main/rag/07-%E5%88%86%E8%AF%8D.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "u8j8EA5xOV_f"
      },
      "outputs": [],
      "source": [
        "!pip install tiktoken --quiet\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import tiktoken\n",
        "\n",
        "encoding = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")\n",
        "\n",
        "def encode_text(text):\n",
        "    print(\"===\" + text)\n",
        "    print(encoding.encode(text))\n",
        "\n",
        "encode_text(\"极越01\")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nb6gITWwOiLs",
        "outputId": "0cee53c8-f703-4b4a-dba4-b59b1705084c"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "===极越01\n",
            "[20119, 223, 50266, 232, 1721]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(encoding.decode([20119]))\n",
        "print(encoding.decode([223]))\n",
        "print(encoding.decode([50266]))\n",
        "print(encoding.decode([232]))\n",
        "print(encoding.decode([1721]))\n",
        "print(encoding.decode([20119, 223, 50266, 232, 1721]))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ZLsHaJvgO84l",
        "outputId": "646dc37d-802c-4fb1-cbf6-87faf04c7249"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "�\n",
            "�\n",
            "�\n",
            "�\n",
            "01\n",
            "极越01\n"
          ]
        }
      ]
    }
  ]
}